In [1]:
import os
from datetime import datetime
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
In [2]:
os.chdir("..")
In [3]:
from Signals.BitcoinData import BitcoinData
from Signals.WikipediaData import WikipediaData
from Signals.FXData import FXData
Download data for relevant range and normalize.
In [4]:
bd = BitcoinData()
bitcoin = bd.get(datetime(2016, 1, 1), datetime(2016, 6, 1))
In [5]:
wd = WikipediaData()
wiki = wd.get(datetime(2016, 1, 1), datetime(2016, 6, 1))
In [6]:
fxd = FXData()
fx = fxd.get(datetime(2016, 1, 1), datetime(2016, 6, 1))
In [7]:
x = bitcoin.merge(fx, how='outer', left_index=True, right_index=True)\
.merge(wiki, how='outer', left_index=True, right_index=True)
Fill forward missing data.
In [8]:
x['USDCNY'].fillna(method='pad', inplace=True)
x['USDEUR'].fillna(method='pad', inplace=True)
x['VIX'].fillna(method='pad', inplace=True)
Add transformations.
In [9]:
x['next_open'] = x['Open'].shift(-1)
x['change'] = x['next_open'] - x['Open']
x['next_day_higher'] = x['change'].map(lambda x: x > 0)
In [10]:
def addlogret(df, col):
df[col+'_logret'] = np.log(df[col]) - np.log(df[col].shift(1))
def addewma(df, col):
for i in [3, 10, 30]:
df[col+'_ewma_'+str(i)] = df[col].ewm(halflife=i).mean()
def addewmvar(df, col):
for i in [3, 10, 30]:
df[col+'_ewmvar_'+str(i)] = df[col].ewm(halflife=i).var()
In [11]:
for col in ['Open', 'Volume', 'USDCNY', 'USDEUR', 'VIX', 'views']:
addlogret(x, col)
addewma(x, col)
addewma(x, col+"_logret")
addewmvar(x, col)
In [12]:
x.dropna(inplace=True)
Build logistic regression model.
In [13]:
y = x['next_day_higher']
In [14]:
x.drop(['next_day_higher'], axis=1, inplace=True)
In [15]:
from sklearn import linear_model
In [16]:
logreg = linear_model.LogisticRegression(C=1e5)
logreg.fit(x, y)
Out[16]:
In [17]:
print("Model score: {:.1f}%".format(100*logreg.score(x, y)))
Test predictions.
In [18]:
from PerfAnalysis.PnL import PnL
In [19]:
pnl = PnL()
pnl.calc_pnl(x, y, price_col="Open")
Out[19]:
213% (annualized) return over the training set, even including 25 bp commission, but how about the final six months of the year?
In [20]:
bitcoin = bd.get(datetime(2016, 6, 1), datetime(2017, 1, 1))
wiki = wd.get(datetime(2016, 6, 1), datetime(2017, 1, 1))
fx = fxd.get(datetime(2016, 6, 1), datetime(2017, 1, 1))
x = bitcoin.merge(fx, how='outer', left_index=True, right_index=True)\
.merge(wiki, how='outer', left_index=True, right_index=True)
x['USDCNY'].fillna(method='pad', inplace=True)
x['USDEUR'].fillna(method='pad', inplace=True)
x['VIX'].fillna(method='pad', inplace=True)
x['next_open'] = x['Open'].shift(-1)
x['change'] = x['next_open'] - x['Open']
x['next_day_higher'] = x['change'].map(lambda x: x > 0)
for col in ['Open', 'Volume', 'USDCNY', 'USDEUR', 'VIX', 'views']:
addlogret(x, col)
addewma(x, col)
addewma(x, col+"_logret")
addewmvar(x, col)
x.drop(['next_day_higher'], axis=1, inplace=True)
x.dropna(inplace=True)
In [21]:
pred = logreg.predict(x)
In [22]:
pnl.calc_pnl(x, pred, price_col="Open")
Out[22]:
A ha, much less successful on the test dataset.